# Boyoon Lee "The Impact of Educational Content on Anti-Immigrant Attitudes"
# 1. Main results (Table 2, Table 3, Table 4, Figure 2)
# Last updated: 2022-11-16


# Initial settings --------------------------------------------------------

# Set directory
setwd("Your path here")

# Packages
library(ggplot2) 
library(miceadds) # for lm.cluster
library(tidyr)
library(dplyr)
library(descr)
library(stargazer)


#######################################################
##            Preparation before analysis            ##
#######################################################


# Load data  --------------------------------------------------------------

### Original data
data <- read.csv("./cleaned_TSCS_did.csv", header=TRUE, sep=",")


### Matched data
# Data set saved from Appendix D (Matching) results.
# See file "4_Online_Appendix_D.R" for the construction
load("./matched_data_Ver3_5_1.Rda")


# Recode / Re-class  ------------------------------------------------------

### Make date variable using Birth year and Birth month
# Since there is no date of birth available, I set the date to the first day of the month
data$date <- as.Date(with(data, paste(birth_yr, birth_month, "01", sep="-")), "%Y-%m-%d")

### Only leave school cohorts before 2002 
# In 2001, Knowing Taiwan series discontinued and replaced with the New Grade 1-9 curriculum (9 year curriculum)
# In addition, from 1968, junior vocational school shut down and expanded senior vocational school
data<-subset(data, data$school_yr<2002&data$school_yr>1967)


# Subsets for the Comparison Group ----------------------------------

# Main comparison group 
yr13_22<-subset(data,school_yr>1987 & school_yr<2002)
yr13_22<-subset(yr13_22,edu_level>3) # those who graduated from at least junior high schools



##########################################
##            Main Analysis             ##
##########################################
# Using lm.cluster() yields the same results as STATA option of 
# "[model], vce (cluster [cluster var])" or "[model], r cluster ([cluster var])"  or "[model], cl([cluster var])"

### Create cluster variable
clus1<-as.integer(yr13_22$school_yr)*as.integer(yr13_22$group_plus)


# Table 2  ------------------------------------------------------

### (1): Original data
# The analysis is based on both pre- and post-treatment controls
# See Appendix E for the analysis based only on pre-treatment controls
did.cl.c2 <- lm.cluster(data= yr13_22, 
                        formula= num_imm ~ post + group + TG
                        + female + married + employed + social_class + edu_uni
                        + post*prts_gen 
                        + as.factor(edu_level) + as.factor(loc_birth) + as.factor(school_yr) + as.factor(year), 
                        cluster = clus1)
summary(did.cl.c2)


### (2): Matched data 
psm.cl.2 <- lm.cluster(data= dta_m,
                       formula= num_imm ~ post*group
                       + female + married + employed + social_class+ edu_uni
                       + post*prts_gen 
                       + as.factor(loc_birth)+ as.factor(edu_level) +as.factor(school_yr) + as.factor(year),
                       cluster = dta_m$school_yr*dta_m$group_plus)
summary(psm.cl.2)


# Figure 2  ------------------------------------------------------

### (Left): Trends in anti-immigrant attitudes

# Number of Immigrants (Dummy)
noNA.immdum.tw<-yr13_22[!is.na(yr13_22$num_imm_dummy)&!is.na(yr13_22$group),]
prop_immdum.tw<-noNA.immdum.tw %>% group_by(date,group,num_imm_dummy) %>% summarise (n=n()) %>% mutate(proportion=n/sum(n))
prop_immd_tw<-subset(prop_immdum.tw,prop_immdum.tw$num_imm_dummy==1)
# Rolling average
rolling <- prop_immd_tw %>%
  dplyr::arrange(desc(group)) %>% 
  dplyr::group_by(group) %>% 
  dplyr::mutate(prop_03 = zoo::rollmean(proportion, k = 3, fill = NA),
                prop_06 = zoo::rollmean(proportion, k = 6, fill = NA),
                prop_12 = zoo::rollmean(proportion, k = 12, fill = NA)) %>% 
  dplyr::ungroup()

### Plot the rolling average
voc<-subset(rolling, rolling$group==0)
gen<-subset(rolling, rolling$group==1)
# Plot
yr<-c(1989,1990,1991,1992,1993,1994,1995,1996,1997,1998,1999,2000,2001)
dates<-c(as.Date("1976-09-01"),voc$date[grep("-09-01",voc$date)][1:9],
         as.Date("1986-09-01"),voc$date[grep("-09-01",voc$date)][10],as.Date("1988-09-01"))
par(mar=c(7, 5, 1, 1.5) + 1)
plot(voc$date,voc$prop_12, ylim=range(voc$prop_12,gen$prop_12,na.rm=T),main = "",
     ylab = "Proportion of Preference for \nIncreased Immigrants",
     pch=20, col="grey60",cex.lab=1.5,cex.axis=1.2, xaxt="n",xlab="",cex=1.5)
axis(1,dates,labels=yr, cex.axis=1.2,mgp=c(2,1,0),las=2)
lines(voc$date, voc$prop_12,col="grey60", lty=2)
points(gen$date, gen$prop_12, pch=16, col="black", cex=1)
lines(gen$date, gen$prop_12,col="black")
mtext("Year Entered into Junior High School",side=1,line=4.1,cex=1.5)
abline(v=as.numeric(as.Date("1984-09-01")),lty=2,col="red",xpd=FALSE)
par(xpd=TRUE)
# Add a box to show phase-in period
rect(as.numeric(as.Date("1984-09-01")),0.135,as.numeric(as.Date("1990-01-05")),0.424, border = NA, col= '#00000011')
legend(as.numeric(as.Date("1976-09-01")), 0.055, legend=c("Vocational path","Academic path"),
       col=c("grey60","black"), lwd=2, lty=c(2,1), pch=c(20,16), cex=1.5, box.lty=0,horiz = TRUE)


### (Right): Placebo reform

# Pre-1997
lmc.coef<-c()
lmc.std.err<-c()
for (i in 1:8){
  # subset the data, including only years before 1997 (actual treatment period)
  pre = yr13_22[yr13_22$school_yr<= 1996,]
  # subset the data, including only years after 1997 
  # Create a new "after treatment" dummy variable and interaction term
  pre$post = as.numeric(pre$school_yr > 1987+i)
  pre$int <- pre$post*pre$group
  # run a placebo with lm.cluster: group & cohort
  clus<-pre$school_yr*pre$group_plus
  did.lmc<-lm.cluster(num_imm ~ post + group + int
                      + female + married + employed + social_class + edu_uni 
                      + post*prts_gen
                      + as.factor(school_yr) +  as.factor(year)  + as.factor(loc_birth) + as.factor(edu_level)
                      , cluster=clus, data=pre)
  lmc.coef[i]<-summary(did.lmc)[4,1]
  lmc.std.err[i]<-summary(did.lmc)[4,2]
}
# Post-1997
lmc.coef.post<-c()
lmc.std.err.post<-c()
for (i in 1:5){
  post<-yr13_22
  # Create a new "after treatment" dummy variable and interaction term
  post$post = as.numeric(post$school_yr > 1995+i)
  post$int <- post$post*post$group
  # run a placebo with lm.cluster: group & cohort
  clus<-post$school_yr*post$group_plus
  did.lmc<-lm.cluster(num_imm ~ post + group + int
                      + female + married + employed + social_class + edu_uni 
                      +  post*prts_gen
                      + as.factor(school_yr) + as.factor(year) + as.factor(loc_birth) + as.factor(edu_level)
                      , cluster=clus, data=post)
  lmc.coef.post[i]<-summary(did.lmc)[4,1]
  lmc.std.err.post[i]<-summary(did.lmc)[4,2]
}

placebo.pre<-rbind(lmc.coef,lmc.std.err)
placebo.post<-rbind(lmc.coef.post,lmc.std.err.post)
placebo<-t(cbind(placebo.pre,placebo.post))
placebo<-cbind(placebo,placebo[,1] - placebo[,2]*(qnorm(0.975)))
placebo<-cbind(placebo,placebo[,1] + placebo[,2]*(qnorm(0.975)))


# Plot the placebo
par(mar=c(6, 5, 1, 1.5) + 1)
plot(1:13,placebo[,1],ylim=c(-0.5,0.5),xlim=c(0.5, 13.5),main = "",
     ylab = "Difference in Preference for \n Increased Immigrants",
     pch=16, col="black",cex.lab=1.5,cex.axis=1.2, xaxt="n",xlab="",cex=1.5)
lines(1:13,placebo[,1],col="black")
axis(1,1:13,labels=seq(1989,2001,1),cex.axis=1.2,mgp=c(2,1,0),las=2)
mtext("Hypothetical Introduction Year of \n the Renshi Taiwan Textbooks",side=1,line=5.5,cex=1.5)
abline(h=0,lty=2,col="red")
abline(v=9,lty=2,col="red")
for(i in 1:13) {
  segments(i,placebo[i,3],i,placebo[i,4],col="black",lwd=2)
} 
arrows(1:13, placebo[,3], 1:13, placebo[,4], code=3, angle=90, length=0.1)
par(xpd=TRUE)
# Add a box to show phase-in period
rect(9,-0.54,14,0.54, border = NA, col= '#00000011')



###################################
#           China Story?          #
###################################


# Table 3  ------------------------------------------------------

# Create Cluster variable
yr2013<-subset(yr13_22,yr13_22$year==2013)
clus2013<-as.integer(yr2013$school_yr)*as.integer(yr2013$group_plus)


### Immigrants from China
china.cl <- lm.cluster(data= yr2013, 
                       formula= citizen_from_china ~ post + group + TG 
                       + female + married + employed + social_class+ edu_uni
                       + post*prts_gen 
                       + as.factor(edu_level) + as.factor(loc_birth) + as.factor(school_yr), 
                       cluster = clus2013)
summary(china.cl)
# Number of obs
nrow(china.cl$lm_res$model)


### Immigrants from Southeast Asia
SEA.cl <- lm.cluster(data= yr2013, 
                     formula= citizen_from_SEA ~ post + group + TG 
                     + female + married + employed + social_class+ edu_uni
                     + post*prts_gen 
                     + as.factor(edu_level) + as.factor(loc_birth) + as.factor(school_yr), 
                     cluster = clus2013)
summary(SEA.cl)
# Number of obs
nrow(SEA.cl$lm_res$model)

### Immigrants from Europe and America
west.cl <- lm.cluster(data= yr2013, 
                      formula= citizen_from_west ~ post + group + TG 
                      + female + married + employed + social_class+ edu_uni
                      + post*prts_gen #+ post*stu_gen_cagr
                      + as.factor(edu_level) + as.factor(loc_birth) + as.factor(school_yr), 
                      cluster = clus2013)
summary(west.cl)
# Number of obs
nrow(west.cl$lm_res$model)



###################################
#         Mechanism Test          #
###################################

# Table 4  ------------------------------------------------------

### Identify as Taiwanese
NI.identity <- lm.cluster(data= yr13_22, 
                       formula= identify ~ post + group + TG
                       + female + married + employed + social_class+ edu_uni
                       + post*prts_gen
                       + as.factor(edu_level) + as.factor(loc_birth) + as.factor(school_yr), 
                       cluster = clus1)
summary(NI.identity)


### Feel close to Taiwan
NI.close <- lm.cluster(data= yr13_22, 
                       formula= close_taiwan ~ post + group + TG
                       + female + married + employed + social_class+ edu_uni
                       + post*prts_gen
                       + as.factor(edu_level) + as.factor(loc_birth) + as.factor(school_yr), 
                       cluster = clus1)
summary(NI.close)


### Exclusive national identity scale ("or")
# Lower values representing more exclusive identity
NI.exclusive1<-lm.cluster(data= yr13_22, 
                          formula= scale_exclusive_or2 ~ post + group + TG
                          + female + married + employed + social_class+ edu_uni
                          + post*prts_gen
                          + as.factor(edu_level) + as.factor(loc_birth) + as.factor(school_yr), 
                          cluster = clus1)
summary(NI.exclusive1)


### Exclusive national identity scale ("and")
NI.exclusive2<-lm.cluster(data= yr13_22, 
                          formula= scale_exclusive_and2 ~ post + group + TG
                          + female + married + employed + social_class+ edu_uni
                          + post*prts_gen
                          + as.factor(edu_level) + as.factor(loc_birth) + as.factor(school_yr), 
                          cluster = clus1)
summary(NI.exclusive2)
